## library(devtools)
## library(BiocManager)
library(SingleCellExperiment)
library(ggplot2)
library(scFeatures) ## devtools::install_github("SydneyBioX/scFeatures")
library(ClassifyR) ## BiocManager::install("ClassifyR", dependencies = TRUE)
library(lisaClust)
library(ggthemes)
library(spicyR) ## BiocManager::install("spicyR")
library(dplyr)
library(limma)
library(plotly)
library(scattermore)
library(tidyr)
library(survival)
library(survminer)
library(spatstat)
library(scater)
library(scran)
library(reshape)
theme_set(theme_classic())
# code for plotting purpose
plot_boxplot <- function( feature ){
data_plot <- t(feature)
data_plot <- melt(data_plot )
colnames(data_plot) <- c("X1", "X2", "value")
data_plot$condition <- unlist( lapply( strsplit( as.character( data_plot$X2), "_cond_"), `[`, 2))
p <- ggplot(data_plot, aes( x = X1, y = value , colour = condition)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
return(p)
}
plot_barplot <- function(data , dodge=F ){
data$patient <- unlist( lapply( strsplit( rownames(data ), "_cond_"), `[`, 1))
data$condition <- unlist( lapply( strsplit( rownames(data ), "_cond_"), `[`, 2))
data <- as.data.frame( melt(data, id=c("patient", "condition")) )
p <- ggplot(data , aes( x = patient , y = value , fill = variable) ) +
geom_bar(stat="identity" ) + facet_wrap(~condition, scale="free") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
return (p)
}
draw_dotplot <- function(data_sce, sample, celltype , group , group_of_interest ){
df <- data.frame(colData( data_sce))
# for each patient, calculate the proportion of each cell type in each region.
df_plot <- NULL
for ( thispatient in unique(df[[sample]])){
this_df <- df[df[[sample]]== thispatient, ]
temp_df <- table( this_df$region, this_df[[celltype]] )
temp_df <- temp_df / rowSums(temp_df)
temp_df <- data.frame( temp_df)
temp_df$patient <- thispatient
temp_df$group <- clinical[clinical[[sample]] == thispatient, group ]
df_plot <- rbind(df_plot, temp_df)
}
# for each region, calculate the average proportion of each cell type across all individuals
df_plot <- df_plot %>% dplyr::group_by( Var1 , Var2, group) %>%
summarise(mean_proportion = mean(Freq))
# we are only interested in the short term and long term survival individuals
df_plot <- df_plot [ df_plot$group %in% group_of_interest, ]
p <- ggplot(df_plot, aes(y = Var2, x = Var1 ,colour =mean_proportion , size = mean_proportion ))+
geom_point() +
facet_grid(~group, scales = "free", space = "free" ) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
xlab("Region" ) + ylab("Cell type") + scale_colour_viridis_c()
return(p)
}
draw_region_clustering_result <- function( data_sce, sample , selected_sample ){
# get meta data for a selected patient to visualise
metadata <- colData(data_sce)
metadata <- metadata[metadata[[sample]] == selected_sample , ]
metadata <- data.frame(metadata)
plotlist <- list() # define the list to store images for region highlighting
plotlist_celltype <- list() # define the list to store images for cell-type highlighting
# optional: define a colour palette
# you can also specify your own colour to use in the variable color_codes
tableau_palette <- scale_colour_tableau( palette = "Tableau 20")
color_codes <- tableau_palette$palette(18)
color_codes <- c(color_codes, "darkgrey") # for all other regions apart from region of interest, make the colour grey
names(color_codes) <- c(unique(metadata$description) , "other regions")
# look through each region to highlight the region of interest, as well as the cell type in the region of interest
for ( thisregion in sort(unique(metadata$region))){
# select the region of interest
selected_region_index <- metadata$region == thisregion
# for all other regions, define them as "other regions" so that they will be greyed out
metadata$selected_region <- "other regions"
metadata$selected_region[selected_region_index] <- "selected region"
# for all cell types outside the region of the interest, also make them greyed out
metadata$celltype <- metadata$description
metadata$celltype[!selected_region_index ] <- "other regions"
metadata$celltype <- factor(metadata$celltype, levels = c(unique(metadata$description), "other regions"))
# plot ggplot highlighting the region of interest
p <- ggplot(metadata, aes(x = Location_Center_X , y = Location_Center_Y , colour = selected_region )) + geom_point(alpha = 0.7) + ggtitle(thisregion) + scale_colour_manual(values = c("grey" , "red"))
# plot ggplot highlighting the cell-types in the region of interest
p2 <- ggplot(metadata, aes(x = Location_Center_X , y = Location_Center_Y , colour = celltype )) + geom_point(alpha = 0.7 ) + ggtitle(thisregion) + scale_colour_manual(values = color_codes)
plotlist [[thisregion ]] <- p
plotlist_celltype [[thisregion ]] <- p2
}
a <- ggarrange(plotlist = plotlist , ncol = 5, nrow = 1 , common.legend = T )
b <- ggarrange(plotlist = plotlist_celltype , ncol = 5, nrow = 1 , common.legend = T )
return (list(a,b))
}
draw_selected_region_boxplot <- function(data_sce, sample , celltype, group , group_of_interest, select_region){
df <- clinical[colData(data_sce)[, sample ], ]
df$region <- data_sce$region
df[[celltype]] <- data_sce[[celltype]]
df_plot <- NULL
for ( thispatient in unique(df[[sample]])){
this_df <- df[df[[sample]] == thispatient, ]
temp_df <- table( this_df$region, this_df[[celltype]] )
temp_df <- temp_df / rowSums(temp_df)
temp_df <- data.frame( temp_df)
temp_df$patient <- thispatient
temp_df$group <- unique( this_df[[group]])
df_plot <- rbind(df_plot, temp_df)
}
df_plot_region <- df_plot[df_plot$Var1 == select_region , ]
df_plot_region <- df_plot_region [ df_plot_region$group %in% group_of_interest, ]
p <- ggplot(df_plot_region, aes(x = Var2, y = Freq, colour = group)) +
geom_boxplot()+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
ylab("Proportion") + xlab("Cell type") + ggtitle("Selected region") + ylim(0, 0.25)
return(p)
}
The recent development of spatial omics allows us to examine the spatial organisation of cells within tissues in their native environment. This workshop will discuss the challenges and analytical skills associated working with multi-sample spatial datasets and their use in disease risk prognosis. We will also introduce general analytical strategies and critical thinking questions that arise in the workflow.
sessionInfo at the end of this document to ensure you are
using the correct version.scFeaturesClassifyRBIDCellNote: This data workshop offers participants the
opportunity to engage in hands-on analysis using R. However, if you are
not comfortable with coding in R, you can still acquire valuable
analytical skills by reviewing the output we provide in this file.
Structure of the 3-hour workshop:
| Activity | Time |
|---|---|
| Part 1: Introduction | 20m |
| Part 2: Exploring spatial data | 20m |
| Part 3: Feature engineering with scFeatures | 30m |
| Break | 15m |
| Part 4: Survival analysis with ClassifyR | 15m |
| Part 5. Identify cohort heterogeneity | 15m |
| Part 6: Cell segmentation with BIDCell | 20m |
| Concluding remarks |
A subset of the widely-known METABRIC breast cancer cohort has recently been analyzed using imaging mass cytometry: Imaging Mass Cytometry and Multiplatform Genomics Define the Phenogenomic Landscape of Breast Cancer. The question we are asking is: can their risk of recurrence be accurately estimated and therefore inform how aggressively they need to be treated? The other component of the analysis is patient clinical data, which has been sourced from Supplementary Table 5 of Dynamics of Breast-cancer Relapse Reveal Late-recurring ER-positive Genomic Subgroups.
## Quick look at data
assay(data_sce)[1:5, 1:5]
## MB-0002:345:93 MB-0002:345:107 MB-0002:345:113 MB-0002:345:114
## HH3_total 4.82032390 3.8229411 2.55845170 4.82208870
## CK19 1.21185160 1.3223530 0.13832258 0.33366668
## CK8_18 2.80274720 2.4720588 0.60545164 2.43351100
## Twist 0.14651649 0.1176471 0.09877419 0.20791112
## CD68 0.07863736 0.1016471 0.03225806 0.05237778
## MB-0002:345:125
## HH3_total 2.41391110
## CK19 0.07055555
## CK8_18 0.20724444
## Twist 0.12004445
## CD68 0.00000000
Basic characteristics of the data objects:
- The dataset contains 38 proteins and 76307 cells.
- The outcome is recurrence-free survival.
DT::datatable(data.frame(colData(data_sce)))